$:.unshift File.dirname(__FILE__), 'lib'
require 'html5'
require 'ostruct'
require 'optparse'

module HTML5::CLI

  def self.parse_opts argv
    options = OpenStruct.new
    options.profile        = false
    options.time           = false
    options.output         = :html
    options.treebuilder    = 'simpletree'
    options.error          = false
    options.encoding       = false
    options.parsemethod    = :parse
    options.serializer     = {
      :encoding            => 'utf-8',
      :omit_optional_tags  => false,
      :inject_meta_charset => false
    }

    opts = OptionParser.new do |opts|
      opts.separator ""
      opts.separator "Parse Options:"

      opts.on("-b", "--treebuilder NAME") do |treebuilder|
        options.treebuilder = treebuilder
      end

      opts.on("-f", "--fragment CONTAINER", "Parse as a fragment") do |container|
        options.parsemethod = :parse_fragment
        options.container = container if container
      end

      opts.separator ""
      opts.separator "Filter Options:"

      opts.on("--[no-]inject-meta-charset", "inject <meta charset>") do |inject|
        options.serializer[:inject_meta_charset] = inject
      end

      opts.on("--[no-]strip-whitespace", "strip unnecessary whitespace") do |strip|
        options.serializer[:strip_whitespace] = strip
      end

      opts.on("--[no-]sanitize", "escape unsafe tags") do |sanitize|
        options.serializer[:sanitize] = sanitize
      end

      opts.separator ""
      opts.separator "Output Options:"

      opts.on("--tree", "output as debug tree") do |tree|
        options.output = :tree
      end

      opts.on("-x", "--xml", "output as xml") do |xml|
        options.output = :xml
        options.treebuilder = "rexml"
      end

      opts.on("--[no-]html", "Output as html") do |html|
        options.output = (html ? :html : nil)
      end

      opts.on("--hilite", "Output as formatted highlighted code.") do |hilite|
        options.output = :hilite
      end

      opts.on("-e", "--error", "Print a list of parse errors") do |error|
        options.error = error
      end

      opts.separator ""
      opts.separator "Serialization Options:"

      opts.on("--[no-]omit-optional-tags", "Omit optional tags") do |omit|
        options.serializer[:omit_optional_tags] = omit
      end

      opts.on("--[no-]quote-attr-values", "Quote attribute values") do |quote|
        options.serializer[:quote_attr_values] = quote
      end

      opts.on("--[no-]use-best-quote-char", "Use best quote character") do |best|
        options.serializer[:use_best_quote_char] = best
      end

      opts.on("--quote-char C", "Use specified quote character") do |c|
        options.serializer[:quote_char] = c
      end

      opts.on("--[no-]minimize-boolean-attributes", "Minimize boolean attributes") do |min|
        options.serializer[:minimize_boolean_attributes] = min
      end

      opts.on("--[no-]use-trailing-solidus", "Use trailing solidus") do |slash|
        options.serializer[:use_trailing_solidus] = slash
      end

      opts.on("--[no-]escape-lt-in-attrs", "Escape less than signs in attribute values") do |lt|
        options.serializer[:escape_lt_in_attrs] = lt
      end

      opts.on("--[no-]escape-rcdata", "Escape rcdata element values") do |rcdata|
        options.serializer[:escape_rcdata] = rcdata
      end

      opts.separator ""
      opts.separator "Other Options:"

      opts.on("-p", "--[no-]profile", "Profile the run") do |profile|
        options.profile = profile
      end

      opts.on("-t", "--[no-]time", "Time the run") do |time|
        options.time = time
      end

      opts.on("-c", "--[no-]encoding", "Print character encoding used") do |encoding|
        options.encoding = encoding
      end

      opts.on_tail("-h", "--help", "Show this message") do
        puts opts
        exit
      end


    end
    opts.parse!(argv)
    options
  end

  def self.open_input f
    if f
      begin
        if f[0..6] == 'http://'
          require 'open-uri'
          f = URI.parse(f).open
          encoding = f.charset
        elsif f == '-'
          f = $stdin
        else
          f = open(f)
        end
      rescue
      end
    else
      $stderr.write("No filename provided. Use -h for help\n")
      exit(1)
    end
    f
  end

  def self.parse(opts, args)
    encoding = nil

    f = open_input args.last

    require 'html5/treebuilders'
    treebuilder = HTML5::TreeBuilders[opts.treebuilder]

    if opts.output == :xml
      require 'html5/liberalxmlparser'
      p = HTML5::XMLParser.new(:tree=>treebuilder)
    else
      require 'html5/html5parser'
      p = HTML5::HTMLParser.new(:tree=>treebuilder)
    end

    if opts.parsemethod == :parse
      args = [f, encoding]
    else
      args = [f, (opts.container || 'div'), encoding]
    end

    if opts.profile
      require 'profiler'
      Profiler__::start_profile
      p.send(opts.parsemethod, *args)
      Profiler__::stop_profile
      Profiler__::print_profile($stderr)
    elsif opts.time
      require 'time' # TODO: switch to benchmark
      t0 = Time.new
      document = p.send(opts.parsemethod, *args)
      t1 = Time.new
      print_output(p, document, opts)
      t2 = Time.new
      puts "\n\nRun took: #{t1-t0}s (plus #{t2-t1}s to print the output)"
    else
      document = p.send(opts.parsemethod, *args)
      print_output(p, document, opts)
    end
  end

  def self.print_output(parser, document, opts)
    puts "Encoding: #{parser.tokenizer.stream.char_encoding}" if opts.encoding

    case opts.output
    when :xml
      print document
    when :html
      require 'html5/treewalkers'
      tokens = HTML5::TreeWalkers[opts.treebuilder].new(document)
      require 'html5/serializer'
      puts HTML5::HTMLSerializer.serialize(tokens, opts.serializer)
    when :hilite
      print document.hilite
    when :tree
      document = [document] unless document.respond_to?(:each)
      document.each {|fragment| puts parser.tree.testSerializer(fragment)}
    end

    if opts.error
      errList=[]
      for pos, errorcode, datavars in parser.errors
        formatstring = HTML5::E[errorcode] || 'Unknown error "%(errorcode)"'
        message = PythonicTemplate.new(formatstring).to_s(datavars)
        errList << "Line #{pos[0]} Col #{pos[1]} " + message
      end
      $stdout.write("\nParse errors:\n" + errList.join("\n")+"\n")
    end
  end

  class PythonicTemplate
    # convert Python format string into a Ruby string, ready to eval
    def initialize format
      @format = format.dup
      @format.gsub!('"', '\\"')
      @format.gsub!(/%\((\w+)\)/, '#{@_\1}')
      @format = '"' + @format + '"'
    end

    # evaluate string
    def to_s(vars=nil)
      vars.each {|var,value| eval "@_#{var}=#{value.dump}"} if vars
      eval @format
    end
  end

  def self.run
    options = parse_opts ARGV
    parse options, ARGV
  end
end
